---
title: "All_Code_Analysis_Plotting"
author: "Omar Hammoud Gallego, Roberto Foa, Xavier Romero-Vidal"
date: "26/04/2023"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(
	echo = FALSE,
	message = FALSE,
	warning = FALSE
)
```

```{r setup and upload packages, include=FALSE, results = FALSE}
library(tm)
library(tidytext)
library(dplyr)
library(SnowballC)
library(ggplot2)
library(textdata)
library(topicmodels)
library("tidyverse")
#install.packages("Twitmo")
library("Twitmo")

library(scales)
library(quanteda)

library(sjlabelled)

#install.packages("reshape")
library(reshape)
library(tidyverse)
library(gdata)
#install.packages("aod")
library("aod")

#install.packages("DataCombine")
library("DataCombine")
library(lubridate)

#install.packages("srvyr")
library(srvyr)

#install.packages("dotwhisker")
library("dotwhisker")
library(lme4)

#install.packages("parameters")  # to calculate ci_method="wald"
library("parameters")

#install.packages("stm")
library("stm")

#install.packages("see")  # Set of packages to visualise models
library("see")
#rm(list=ls()) to clean environment

# packages for multi level model
library(cowplot)
library(haven)
library(lme4)
library(car)
library(arm)
library(dplyr)
library(MuMIn)
library(lmerTest)
library(estimatr)
library("foreign")
library(stargazer)
library(jtools)

#install.packages("survey")
library(survey)

library("naniar")
library(gtsummary)
library(kableExtra)

```

# Plot Tweets Trends

```{r, upload data to see trends in tweets, eval= FALSE}

setwd("/Users/HammoudG/OneDrive - London School of Economics/Satisfaction_Democracy_Project/")

# Omar Windows upload. 
#setwd("C:/Users/omarh/OneDrive - London School of Economics/Satisfaction_Democracy_Project")

Tweets<- read.csv("All_Tweets_Classified_Sentiment.csv", stringsAsFactors = TRUE)

#Xavi:
#Tweets <- read.csv("/Volumes/GoogleDrive/Other computers/My Laptop/Documents/_University/2021 Cambridge/_Project/_Tracker/Data/All_Tweets_Classified_Sentiment.csv", stringsAsFactors = TRUE)


```


```{r, select 3 random tweets for each category, eval= FALSE}

Tweets_Ex<- Tweets %>%
    select(text, newspaper, topic)

#summary(Tweets_Ex$topic)


set.seed(35)

Tweets_Ex_Sample<- Tweets_Ex %>%
  group_by(topic) %>%
  slice_sample(n=3)


library(knitr)
# Draw Table  booktabs= T,
Tweets_Ex_Sample %>%
  kbl(caption = "Random Selection of Three Tweets for each Topic", col.names = c("Text of Tweet","Newspaper","Topic"),format = "html") %>%
  kable_classic()  



```




```{r, Salience of various topics plot, eval= FALSE}




#glimpse(Tweets)

Tweets_Sum<- Tweets %>%
  group_by(created_at.x, topic) %>%
  summarise(n= n()) %>%
  group_by(created_at.x) %>%
  mutate(perc = 100*n/sum(n)) 

library(lubridate)

# Date as Date format
Tweets_Sum$created_at.x<- as.Date(as.character(Tweets_Sum$created_at.x))

Tweets_Sum<- Tweets_Sum %>%
   dplyr::group_by(week =floor_date(created_at.x, "7 days"), topic) %>%
    mutate(perc_week_topic = mean(perc))

summary(Tweets_Sum$topic)

Tweets_Sum<- Tweets_Sum %>%
    mutate(topic =  fct_recode(topic, 
                             "Climate Change" = "Climate_Change", 
                             "Daily News"= "Daily_News",
                             "Entertainment"="Enterteinment",
                             "Animals and Pets"="Puppy_News",
                             "Royal Family"="Royal_Family",
                             "UK Politics"="Uk_Politics",
                             "US Politics"="US_Politics",
                             "Political Violence"="Violence"))



ALL_Tweets_Topics<- ggplot(Tweets_Sum, aes(week, perc_week_topic)) +
    #geom_area(alpha = 0.9) +
    geom_line()+
    facet_wrap(~ topic, ncol= 3)+
    labs(title= "Average Issue Salience across Newspapers",x = "", 
         y = "Percentage of Tweets each week")+
    theme_bw()+
   scale_x_date(labels = date_format("%b %y"))+
   theme(axis.text.x = element_text(angle = 45, hjust=1), text = element_text(size = 25))    

ALL_Tweets_Topics

#for mac
setwd("/Users/HammoudG/Documents/GitHub/Satisfaction_Democracy/data-viz/")

ggsave("ALL_Tweets_Topics.png", ALL_Tweets_Topics, width=10, height= 12)

```

```{r, check topic predictive validity, eval= FALSE, eval= FALSE}

summary(Tweets$topic)

Tweets %>% filter(topic =="Royal_Family" & created_at.x == "2020-02-19")

Tweets %>% filter(topic =="Lockdown_Consequences" & created_at.x == "2021-04-19")

```


# Multi-Level Model
```{r, Upload combined_survey data}


# Omar Windows upload. 
#setwd("C:/Users/omarh/Documents/GitHub/Satisfaction_Democracy/data-raw/You_Gov/")

#setwd("/Users/HammoudG/Documents/GitHub/Satisfaction_Democracy/data-raw/You_Gov/")

#You_Gov<- read.csv("You_Gov_Dec_2021.csv")

# For Windows
#setwd("C:/Users/omarh/OneDrive - London School of Economics/Satisfaction_Democracy_Project")

# For Mac
setwd("/Users/HammoudG/OneDrive - London School of Economics/Satisfaction_Democracy_Project/")

Mod_Dataset<- read.csv("combined_survey_tweets.csv", stringsAsFactors = TRUE)

Mod_Dataset$lastvote = relevel(Mod_Dataset$lastvote, ref = "Did not vote")


```

```{r, preparation You Gov Data for merging Updated 26 04 2022, eval= FALSE }

You_Gov$CAM_demtrack_satisfied<- as.factor(You_Gov$CAM_demtrack_satisfied)
You_Gov$pastvote_EURef<-as.factor(You_Gov$pastvote_EURef)
You_Gov$pastvote_ge_2019<- as.factor(You_Gov$pastvote_ge_2019)
You_Gov$profile_newspaper_readership<- as.factor(You_Gov$profile_newspaper_readership)
You_Gov$voted_ge_2019<- as.factor(You_Gov$voted_ge_2019)

# factor into character
You_Gov$starttime<- as.character(You_Gov$starttime)

# to transform character into date
You_Gov$starttime <- strptime(You_Gov$starttime, format = "%Y-%m-%d %H:%M:%OS", tz = "GMT")

# Posixct into date
You_Gov$starttime <- as.Date(as.character(as.POSIXct(You_Gov$starttime)))


# Filter for valid values 
You_Gov <- You_Gov %>%
    dplyr::filter(CAM_demtrack_satisfied %in% c("1", "2", "3", "4")) %>%
    dplyr::filter(pastvote_EURef %in% c("1", "2", "3")) %>%
  #  dplyr::filter(pastvote_ge_2019 %in% c("1", "2", "3", "4")) %>%
        dplyr::filter(profile_newspaper_readership %in% c("2", "3", "5","6", "7", "8","9","10","16"))
    

# TASK 1. To change pastvote_ge_2019 to 95 when voted_ge_2019 == 2
You_Gov$pastvote_ge_2019<- as.numeric(You_Gov$pastvote_ge_2019)
You_Gov$voted_ge_2019<- as.numeric(You_Gov$voted_ge_2019)

# NAs as 0s
You_Gov$pastvote_ge_2019[is.na(You_Gov$pastvote_ge_2019)]<- 0

# Replace cell value based on value other cell in different column
You_Gov<- You_Gov %>%
  mutate(pastvote_ge_2019= replace(pastvote_ge_2019, voted_ge_2019 == 2, 95))

You_Gov$pastvote_ge_2019<- as.factor(You_Gov$pastvote_ge_2019)
You_Gov$voted_ge_2019<- as.factor(You_Gov$voted_ge_2019)

# To double check that replacement was correctly done
#You_Gov %>% select(voted_ge_2019, pastvote_ge_2019) %>% filter(voted_ge_2019 == "2")

#summary(as.factor(You_Gov$voted_ge_2019))
#summary(as.factor(You_Gov$pastvote_ge_2019))

library(dplyr)
# Task 2. Assign to each newspaper a name, and to those who don't read newspapers then BBC News

# Rename all relevant factors to consider in model
You_Gov<- You_Gov %>%
     dplyr::mutate(CAM_demtrack_satisfied = fct_recode(CAM_demtrack_satisfied, 
                                                "Satisfied" ="1", 
                                                "Satisfied" = "2", 
                                                "Not Satisfied" = "3", 
                                                "Not Satisfied" = "4")) %>%
    dplyr::mutate(pastvote_EURef = fct_recode(pastvote_EURef, 
                                        "Voted Remain" = "1",
                                        "Voted Leave" = "2",
                                        "Did not vote" = "3")) %>%
    dplyr::mutate(profile_newspaper_readership = fct_recode(profile_newspaper_readership, 
                                                          "daily_mail" = "2",
                                                          "daily_mirror" = "3", 
                                                          "sun" = "5",
                                                          "telegraph"= "6",
                                                          "ft" = "7", 
                                                          "guardian" = "8",
                                                          "independent" = "9",
                                                          "times" = "10",
                                                          "bbc_news"= "16")) %>%
    dplyr::mutate(pastvote_ge_2019 = fct_recode(pastvote_ge_2019, 
                                                "Conservatives" = "1", 
                                                "Labour" = "2", 
                                                "Liberal Democrats" = "3", 
                                                "Scottish National Party" = "4",
                                                "Did not vote" = "95")) 
library(gdata)
You_Gov<- You_Gov %>%  drop.levels(You_Gov$profile_newspaper_readership)


# Double check that 
#summary(You_Gov$profile_newspaper_readership)
#summary(as.factor(wide_frame$newspaper))


# Re factor educational level

You_Gov$profile_education_level<- as.factor(as.character(You_Gov$profile_education_level))

You_Gov<- You_Gov %>%
  mutate(profile_education_level = fct_recode(profile_education_level, 
                                                "No_education" ="1", 
                                                "secondary_education" = "2",
                                                "secondary_education"= "3",
                                                "secondary_education" ="4", 
                                                "secondary_education" = "5",
                                                "secondary_education"= "6",
                                                "secondary_education" ="7", 
                                                "secondary_education" = "8",
                                                "secondary_education"= "9",
                                                "secondary_education" ="10", 
                                                "secondary_education" = "11",
                                                "other_higher_qualification"= "12",
                                                "other_higher_qualification"= "13",                                                                                "other_higher_qualification" = "14",
                                                "University_level_education"= "15",
                                                "University_level_education"= "16",
                                                "University_level_education"= "17",
                                                "other_higher_qualification"= "18",
                                                "don't_know"= "19",
                                                "prefer_not_to_say"= "20"))



# Control for education, political attention and other 
# political_attention
# profile_education

```


## Regression

```{r, multi-level model}

Mod_Dataset$age_square <- Mod_Dataset$age^2

Mod_Dataset$round <- as.numeric(Mod_Dataset$week)

Mod_Dataset$leftpaper <- ""
Mod_Dataset$leftpaper[Mod_Dataset$newspaper=="guardian"] <- "Left Paper"
Mod_Dataset$leftpaper[Mod_Dataset$newspaper=="daily_mirror"] <- "Left Paper"
Mod_Dataset$leftpaper[Mod_Dataset$newspaper=="independent"] <- "Left Paper"


Mod_Dataset$leftpaper<- as.factor(Mod_Dataset$leftpaper)


Mod_Dataset$leftpaper[Mod_Dataset$newspaper=="sun"] <- "Right Paper"
Mod_Dataset$leftpaper[Mod_Dataset$newspaper=="daily_mail"] <- "Right Paper"
Mod_Dataset$leftpaper[Mod_Dataset$newspaper=="times"] <- "Right Paper"
Mod_Dataset$leftpaper[Mod_Dataset$newspaper=="telegraph"] <- "Right Paper"

Mod_Dataset$brexiteer <- 0
Mod_Dataset$brexiteer[Mod_Dataset$pastvote_EURef=="Voted Leave"] <- 1


# RUN Multi-effects Model with random effects for newspaper, last vote by week (round)
Multi_Level <- lmer(data=Mod_Dataset, DEM_SAT ~ age + age_square + profile_gender + factor(profile_GOR) + profile_education_level  + pastvote_EURef + ( 1 + factor(leftpaper) + factor(lastvote)   | round )  ) # + brexiteer  # In this model we use random effects for newspaper readership and party vote by week.  MODEL AS USED BY ROBERTO



#summary(Multi_Level)

# Save Multi-level model
saveRDS(Multi_Level, file = "Multi_Level_Model.rds")

# Upload saved multilevel model
Multi_Level <- readRDS("Multi_Level_Model.rds")
```

## Plots Multilevel Model

```{r, Roberto's functions}


theme_article <- function(base_size=11, font="Times New Roman"){
  
  txt <- element_text(size = base_size+2, colour = "black", face = "plain", family=font)
  bold_txt <- element_text(size = base_size+2, colour = "black", face = "bold", family=font)
  
  theme_bw(base_size = base_size, base_family = font)+
    theme(
      ###### clean up!
      legend.key = element_blank(), 
      strip.background = element_blank(), 
      ########### text basics
      text = txt, 
      plot.title = txt, 
      
      axis.title = txt, 
      axis.text = txt, 
      
      legend.title = bold_txt, 
      legend.text = txt ) +
    
    ############## lines, background, panel
    theme(
      panel.grid.major.x = element_blank() , 
      panel.grid.minor.x = element_blank(), 
      panel.grid.major.y = element_line( size=.5, color="#CCCCCC"), panel.grid.minor.y = element_line( size=.5, color="#DDDDDD"),
      
      axis.line.y = element_line(colour = "#CCCCCC", size = 0.3),
      axis.line.x = element_line(colour = "#CCCCCC", size = 0.3),
      #### remove Tick marks
      axis.ticks=element_blank(),
      
      ### no legend!
      legend.position = "none", 
      
      ## background
      plot.background = element_rect(fill = "white",colour = "grey",size = 0.5)
    )
}


### 5-year rolling average function.
## basically you take a vector, figure out the length, generate the first 2 and last 2, rest is smoothed. 

rolling <- function(vector, country=c(NA,NA) ){
  
  
  smoothed <- vector
  len <- length(vector)
  
  smoothed[1] <- mean(vector[1:3], na.rm=T)  
  smoothed[2] <- mean(vector[1:4], na.rm=T)  
  
  for (i in 3:(len-2) ){ 
    smoothed[i] <-   mean(vector[(i-2):(i+2)], na.rm=T) 
  }
  
  if(!is.na(country[1])){
    for (i in 5:(len-2) ){ 
      if(country[i]!=country[i-1]){smoothed[i] <- mean(vector[i:i+2], na.rm=T)
      smoothed[i+1] <- mean(vector[(i-1):i+2], na.rm=T)
      smoothed[(i-1)] <- mean(vector[(i-3):(i-1)], na.rm=T)
      smoothed[(i-2)] <- mean(vector[(i-4):(i-1)], na.rm=T)
      }
    }
    
  }
  
  
  smoothed[len] <- mean(vector[(len-2):len], na.rm=T)  
  smoothed[len-1] <- mean(vector[(len-3):len], na.rm=T)  
  
  
  return(smoothed)  
}



## shorthand for as.data.frame
adf <- function(x) { 
  return(as.data.frame(x)) 
}

## n - shorthand for as.numeric
n <- function(x) { 
  return(as.numeric(x)) 
}

```


```{r, multi-level model Random Effects on Party Voted For Labour Conservatives}

foo <- ranef(Multi_Level)$round
foo <- adf(foo)

foo$round <- row.names(foo)
foo$round <- n(foo$round)

foo$date <- as.Date("2019-11-22") + ((foo$round-1)*7)


names(foo) <- c("intercept","Left Newspapers", "Right Newspapers", "Conservative","Labour","LibDem","VotedOther", "SNP", "round","date") #"Brexiteer",


#### Conservative vs. Labour Party Supporters 


foo$labour_lb <- ranef(Multi_Level)$round[,5]-(se.ranef(Multi_Level)$round[,5]*1.645)
foo$labour_ub <- ranef(Multi_Level)$round[,5]+(se.ranef(Multi_Level)$round[,5]*1.645)

foo$conservative_lb <- ranef(Multi_Level)$round[,4]-(se.ranef(Multi_Level)$round[,4]*1.645)
foo$conservative_ub <- ranef(Multi_Level)$round[,4]+(se.ranef(Multi_Level)$round[,4]*1.645)

#+0.224 

# Ask Roberto to explain added values when calculating rolling.

plot_con_lab<- ggplot(subset(foo, date>as.Date("2020-03-01") ), aes(x=as.Date(date), y=rolling(Conservative) )) + 
  geom_ribbon(aes(ymax=rolling(conservative_ub), ymin=rolling(conservative_lb) ),alpha=.05 ) +
  geom_line(size=1.5, color="darkblue", alpha=0.65) + xlab("") + 
  geom_hline(yintercept=0 , alpha=0.25, size=1.5) +
  geom_ribbon(aes(ymax=rolling(labour_ub), ymin=rolling(labour_lb) ),alpha=.05 ) +
  geom_line(data=subset(foo, date>as.Date("2020-03-01") ), aes(x=as.Date(date), y=rolling(Labour) ),#+0.224 
            size=1.5, color="darkred", alpha=0.65) + 
  geom_vline(xintercept=as.Date("2020-04-01"), linetype="dotted") +
  scale_y_continuous(breaks=c(-0.2,-0.1,0,0.1,0.2 ), labels=c("-20%","-10%","0%","+10%","+20%")) + 
  coord_cartesian(ylim=c(-0.2,0.25), xlim=c(as.Date("2020-01-01"),as.Date("2022-04-01"))) +
  annotate(geom="text", label="Conservative", color="darkblue", x=as.Date("2022-01-01"), hjust=0, y=tail(rolling(foo$Conservative),1)  )   + 
  annotate(geom="text", label="Labour", color="darkred", x=as.Date("2022-01-01"), hjust=0, y=tail(rolling(foo$Labour),1)  )   + 
  ylab("Estimated Effect Upon Satisfaction with Democracy") + 
  theme_article() 

plot_con_lab

#for windows
#setwd("C:/Users/omarh/Documents/GitHub/Satisfaction_Democracy/data-viz")

#for mac
setwd("/Users/HammoudG/Documents/GitHub/Satisfaction_Democracy/data-viz/")

ggsave("LabourConservativesEffect.png", plot_con_lab, width=16, height= 9)




```


```{r, multi-level model Random Effects on Newspaper Readership individually}



foo$leftpapers <- rowMeans( cbind(foo$"Left Newspapers") )

#-0.0734

leftpapers <- 
  ggplot(subset(foo, date>as.Date("2020-03-01") ), aes(x=as.Date(date), y=rolling(leftpapers)  )) + 
  geom_line() + xlab("") + 
  geom_vline(xintercept=as.Date("2020-04-01"), linetype="dotted") +
  coord_cartesian(ylim=c(-0.15,0)) +
  theme_classic() 

leftpapers

ggsave("leftpapers.png", leftpapers)

foo$rightpapers <- rowMeans( cbind(foo$"Right Newspapers") )

#+ 0.07

rightpapers <- 
  ggplot(subset(foo, date>as.Date("2020-03-01") ), aes(x=as.Date(date), y=rolling(rightpapers)  )) + 
  geom_line() + xlab("") + 
  geom_vline(xintercept=as.Date("2020-04-01"), linetype="dotted") +
  coord_cartesian(ylim=c(0,0.1)) +
  theme_classic() 

rightpapers


ggsave("rightpapers.png", rightpapers)

```

```{r, multi-level model Random Effects on Newspaper Readership together}



## getting a proxy here for standard error but really you cannot just "add" them - something to fix later
## as re-estimating multilevel models for this could take a while

# to calculate 90% se
foo$leftpapers_lb <- (ranef(Multi_Level)$round[,2]-(se.ranef(Multi_Level)$round[,2]*1.645))
foo$leftpapers_ub <- (ranef(Multi_Level)$round[,2]+(se.ranef(Multi_Level)$round[,2]*1.645)) 


foo$rightpapers_lb <- (ranef(Multi_Level)$round[,3]-(se.ranef(Multi_Level)$round[,3]*1.645))
foo$rightpapers_ub <- (ranef(Multi_Level)$round[,3]+(se.ranef(Multi_Level)$round[,3]*1.645)) 


# + 0.07

Right_Left_Newspaper_Plot<- ggplot(subset(foo, date>as.Date("2020-02-28") ), aes(x=as.Date(date), y=rolling(rightpapers)  )) + 
  geom_ribbon(aes(ymax=rolling(rightpapers_ub ), ymin=rolling(rightpapers_lb ) ),alpha=.05 ) +
  geom_line(size=1.5, color="darkblue", alpha=0.65) + xlab("") + 
  geom_hline(yintercept=0 , alpha=0.25, size=1.5) +
  geom_ribbon(aes(ymax=rolling(leftpapers_ub ), ymin=rolling(leftpapers_lb) ),alpha=.05 ) +
  geom_line(data=subset(foo, date>as.Date("2020-02-28") ), aes(x=as.Date(date), y=rolling(leftpapers)  ),
            size=1.5, color="darkred", alpha=0.65) +  #- 0.073 
  geom_vline(xintercept=as.Date("2020-04-01"), linetype="dotted") +
  coord_cartesian(ylim=c(-0.12,0.1),  xlim=c(as.Date("2020-01-01"),as.Date("2022-05-01"))) +
  scale_y_continuous(breaks=c(-0.2,-0.1,0,0.1,0.2 ), labels=c("-20%","-10%","0%","+10%","+20%")) + 
  annotate(geom="text", label="Rightwing Papers", color="darkblue", x=as.Date("2022-01-01"), hjust=0, y=tail(rolling(foo$rightpapers),1)  )   + 
  annotate(geom="text", label="Leftwing Papers", color="darkred", x=as.Date("2022-01-01"), hjust=0, y=tail(rolling(foo$leftpapers),1)  )   + 
  ylab("Estimated Effect upon Satisfaction with Democracy") + 
  theme_article() 

Right_Left_Newspaper_Plot

# for windows
#setwd("C:/Users/omarh/Documents/GitHub/Satisfaction_Democracy/data-viz/")

#for mac
setwd("/Users/HammoudG/Documents/GitHub/Satisfaction_Democracy/data-viz/")

ggsave("LeftRightPapersEffect.png", Right_Left_Newspaper_Plot, width=16, height= 9)


```




# Regression Models

## Separate topic variables Logit Models

```{r, upload data for modelling}

setwd("/Users/HammoudG/OneDrive - London School of Economics/Satisfaction_Democracy_Project/")

# Omar Windows upload. 
#setwd("C:/Users/omarh/OneDrive - London School of Economics/Satisfaction_Democracy_Project")

Mod_Dataset<- read.csv("combined_survey_tweets.csv", stringsAsFactors = TRUE)


```

```{r, recode variables dataset}

#summary(Mod_Dataset$newspaper)

Mod_Dataset<- Mod_Dataset %>%
    mutate(leftpaper = ifelse(newspaper %in% c("guardian","independent","daily_mirror"),"1","0"))

Mod_Dataset$leftpaper<- as.factor(Mod_Dataset$leftpaper)

Mod_Dataset<- Mod_Dataset %>%
    filter(newspaper != "ft") %>%
    drop.levels()


# set reference levels
Mod_Dataset$lastvote = relevel(Mod_Dataset$lastvote, ref = "Did not vote")

Mod_Dataset$profile_education_level = relevel(Mod_Dataset$profile_education_level, ref = "No_education")




Mod_Dataset$profile_gross_household <- recode_factor(Mod_Dataset$profile_gross_household, 
                               "under £5,000 per year"= "1",
   "£5,000 to £9,999 per year"= "2",
   "£10,000 to £14,999 per year"= "3",
   "£15,000 to £19,999 per year"= "4",
   "£20,000 to £24,999 per year"= "5",
   "£25,000 to £29,999 per year"= "6",
   "£30,000 to £34,999 per year"= "7",
   "£35,000 to £39,999 per year"= "8",
   "£40,000 to £44,999 per year"= "9",
   "£45,000 to £49,999 per year"= "10",
   "£50,000 to £59,999 per year"= "11",
   "£60,000 to £69,999 per year"= "12",
   "£70,000 to £99,999 per year"= "13",
   "£100,000 to £149,999 per year"= "14",
   "£150,000 and over"= "15",
   "Don't know"= "NA",
   "Prefer_not_to_answer" = "NA")


Mod_Dataset$profile_gross_household<- as.numeric(as.character(Mod_Dataset$profile_gross_household))



# Normalise distribution of all retweets 
#Mod_Dataset<- Mod_Dataset %>%
#mutate_at(c(6:197), funs(c(scale(.))))




Mod_Dataset_1 <- svydesign(id=~X, weights=~weight, data=Mod_Dataset)

```

### Logit with Survey Weights - AvG

```{r, function to transform logit coef into probability, eval = FALSE}

logit2prob <- function(logit){
  odds <- exp(logit)
  prob <- odds / (1 + odds)
  return(prob)
}


AV_2D<- logit2prob(coef(avg_logit1))



#coef(avg_logit1)


#install.packages("marginaleffects")
library("marginaleffects")

pred<- predictions(avg_logit1, type = "response")
head(pred)

plot_cap(avg_logit1, conditions = "retweet_2d_avg_Uk_Politics", type = "list")



```

```{r, survey package lead retweets, eval = FALSE}

glimpse(Mod_Dataset)
avg_logit1 <- (svyglm(DEM_SAT ~ lead_retweets_Climate_Change+ 
                       lead_retweets_Covid + 
                      lead_retweets_Crime+
                       lead_retweets_Daily_News+
                       lead_retweets_Enterteinment+
                       lead_retweets_Puppy_News+
                       lead_retweets_Royal_Family+
                       lead_retweets_Sports+
                       lead_retweets_Uk_Politics+
                       lead_retweets_US_Politics +
                       lead_retweets_Violence+
                       lead_retweets_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial, design=Mod_Dataset_1))
summary(avg_logit1)

```

```{r, survey package lead retweet_2d_avg }


avg_logit1 <- (svyglm(DEM_SAT ~ retweet_2d_avg_Climate_Change+ 
                       retweet_2d_avg_Covid + 
                      retweet_2d_avg_Crime+
                       retweet_2d_avg_Daily_News+
                       retweet_2d_avg_Enterteinment+
                       retweet_2d_avg_Puppy_News+
                       retweet_2d_avg_Royal_Family+
                       retweet_2d_avg_Sports+
                       retweet_2d_avg_Uk_Politics+
                       retweet_2d_avg_US_Politics +
                       retweet_2d_avg_Violence+
                       retweet_2d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial, design=Mod_Dataset_1))
summary(avg_logit1)

stargazer(avg_logit1, type="text")

```

```{r, survey package lead retweet_3d_avg, eval = FALSE}



avg_logit2 <- (svyglm(DEM_SAT ~ retweet_3d_avg_Climate_Change+ 
                       retweet_3d_avg_Covid + 
                      retweet_3d_avg_Crime+
                       retweet_3d_avg_Daily_News+
                       retweet_3d_avg_Enterteinment+
                       retweet_3d_avg_Puppy_News+
                       retweet_3d_avg_Royal_Family+
                       retweet_3d_avg_Sports+
                       retweet_3d_avg_Uk_Politics+
                       retweet_3d_avg_US_Politics +
                       retweet_3d_avg_Violence+
                       retweet_3d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial, design=Mod_Dataset_1))
summary(avg_logit2)

stargazer(avg_logit2, type="text")
```

```{r, survey package lead retweet_7d_avg, eval = FALSE}



avg_logit3 <- (svyglm(DEM_SAT ~ retweet_7d_avg_Climate_Change+ 
                       retweet_7d_avg_Covid + 
                      retweet_7d_avg_Crime+
                       retweet_7d_avg_Daily_News+
                       retweet_7d_avg_Enterteinment+
                       retweet_7d_avg_Puppy_News+
                       retweet_7d_avg_Royal_Family+
                       retweet_7d_avg_Sports+
                       retweet_7d_avg_Uk_Politics+
                       retweet_7d_avg_US_Politics +
                       retweet_7d_avg_Violence+
                       retweet_7d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial, design=Mod_Dataset_1))
summary(avg_logit3)

stargazer(avg_logit3, type="text")
```

```{r, survey package lead retweet_10d_avg }



avg_logit4 <- (svyglm(DEM_SAT ~ retweet_10d_avg_Climate_Change+ 
                       retweet_10d_avg_Covid + 
                      retweet_10d_avg_Crime+
                       retweet_10d_avg_Daily_News+
                       retweet_10d_avg_Enterteinment+
                       retweet_10d_avg_Puppy_News+
                       retweet_10d_avg_Royal_Family+
                       retweet_10d_avg_Sports+
                       retweet_10d_avg_Uk_Politics+
                       retweet_10d_avg_US_Politics +
                       retweet_10d_avg_Violence+
                       retweet_10d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial, design=Mod_Dataset_1))

summary(avg_logit4)

stargazer(avg_logit4, type="text")


```

```{r, survey package lead retweet_15d_avg }

#glimpse(Mod_Dataset)

avg_logit5 <- (svyglm(DEM_SAT ~ retweet_15d_avg_Climate_Change+ 
                       retweet_15d_avg_Covid + 
                      retweet_15d_avg_Crime+
                       retweet_15d_avg_Daily_News+
                       retweet_15d_avg_Enterteinment+
                       retweet_15d_avg_Puppy_News+
                       retweet_15d_avg_Royal_Family+
                       retweet_15d_avg_Sports+
                       retweet_15d_avg_Uk_Politics+
                       retweet_15d_avg_US_Politics +
                       retweet_15d_avg_Violence+
                       retweet_15d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial, design=Mod_Dataset_1))

summary(avg_logit5)


stargazer(avg_logit5, type="text")


```

```{r, survey package lead retweet_20d_avg }



avg_logit6 <- (svyglm(DEM_SAT ~ retweet_20d_avg_Climate_Change+ 
                       retweet_20d_avg_Covid + 
                      retweet_20d_avg_Crime+
                       retweet_20d_avg_Daily_News+
                       retweet_20d_avg_Enterteinment+
                       retweet_20d_avg_Puppy_News+
                       retweet_20d_avg_Royal_Family+
                       retweet_20d_avg_Sports+
                       retweet_20d_avg_Uk_Politics+
                       retweet_20d_avg_US_Politics +
                       retweet_20d_avg_Violence+
                       retweet_20d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial, design=Mod_Dataset_1))


summary(avg_logit6)

stargazer(avg_logit6, type="text")



```

```{r, survey package lead retweet_25d_avg }



avg_logit7 <- (svyglm(DEM_SAT ~ retweet_25d_avg_Climate_Change+ 
                       retweet_25d_avg_Covid + 
                      retweet_25d_avg_Crime+
                       retweet_25d_avg_Daily_News+
                       retweet_25d_avg_Enterteinment+
                       retweet_25d_avg_Puppy_News+
                       retweet_25d_avg_Royal_Family+
                       retweet_25d_avg_Sports+
                       retweet_25d_avg_Uk_Politics+
                       retweet_25d_avg_US_Politics +
                       retweet_25d_avg_Violence+
                       retweet_25d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial, design=Mod_Dataset_1))



#summary(avg_logit7)

stargazer(avg_logit7, type="text")

```

```{r, survey package lead retweet_30d_avg, eval = FALSE}



avg_logit8 <- (svyglm(DEM_SAT ~ retweet_30d_avg_Climate_Change+ 
                       retweet_30d_avg_Covid + 
                      retweet_30d_avg_Crime+
                       retweet_30d_avg_Daily_News+
                       retweet_30d_avg_Enterteinment+
                       retweet_30d_avg_Puppy_News+
                       retweet_30d_avg_Royal_Family+
                       retweet_30d_avg_Sports+
                       retweet_30d_avg_Uk_Politics+
                       retweet_30d_avg_US_Politics +
                       retweet_30d_avg_Violence+
                       retweet_30d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial, design=Mod_Dataset_1))



summary(avg_logit8)

#stargazer(avg_logit8, type="text")

```

# Probit with Survey Weights - AvG. family=binomial(link= "probit")

```{r, survey package lead retweet_2d_avg }


avg_logit1 <- (svyglm(DEM_SAT ~ retweet_2d_avg_Climate_Change+ 
                       retweet_2d_avg_Covid + 
                      retweet_2d_avg_Crime+
                       retweet_2d_avg_Daily_News+
                       retweet_2d_avg_Enterteinment+
                       retweet_2d_avg_Puppy_News+
                       retweet_2d_avg_Royal_Family+
                       retweet_2d_avg_Sports+
                       retweet_2d_avg_Uk_Politics+
                       retweet_2d_avg_US_Politics +
                       retweet_2d_avg_Violence+
                       retweet_2d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial(link= "probit"), design=Mod_Dataset_1))

#summary(avg_logit1)

stargazer(avg_logit1, type="text")

```

```{r, survey package lead retweet_10d_avg }



avg_logit4 <- (svyglm(DEM_SAT ~ retweet_10d_avg_Climate_Change+ 
                       retweet_10d_avg_Covid + 
                      retweet_10d_avg_Crime+
                       retweet_10d_avg_Daily_News+
                       retweet_10d_avg_Enterteinment+
                       retweet_10d_avg_Puppy_News+
                       retweet_10d_avg_Royal_Family+
                       retweet_10d_avg_Sports+
                       retweet_10d_avg_Uk_Politics+
                       retweet_10d_avg_US_Politics +
                       retweet_10d_avg_Violence+
                       retweet_10d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial(link= "probit"), design=Mod_Dataset_1))

#summary(avg_logit4)

stargazer(avg_logit4, type="text")


```

```{r, survey package lead retweet_15d_avg }

#glimpse(Mod_Dataset)

avg_logit5 <- (svyglm(DEM_SAT ~ retweet_15d_avg_Climate_Change+ 
                       retweet_15d_avg_Covid + 
                      retweet_15d_avg_Crime+
                       retweet_15d_avg_Daily_News+
                       retweet_15d_avg_Enterteinment+
                       retweet_15d_avg_Puppy_News+
                       retweet_15d_avg_Royal_Family+
                       retweet_15d_avg_Sports+
                       retweet_15d_avg_Uk_Politics+
                       retweet_15d_avg_US_Politics +
                       retweet_15d_avg_Violence+
                       retweet_15d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial(link= "probit"), design=Mod_Dataset_1))

#summary(avg_logit5)


stargazer(avg_logit5, type="text")


```

```{r, survey package lead retweet_20d_avg }



avg_logit6 <- (svyglm(DEM_SAT ~ retweet_20d_avg_Climate_Change+ 
                       retweet_20d_avg_Covid + 
                      retweet_20d_avg_Crime+
                       retweet_20d_avg_Daily_News+
                       retweet_20d_avg_Enterteinment+
                       retweet_20d_avg_Puppy_News+
                       retweet_20d_avg_Royal_Family+
                       retweet_20d_avg_Sports+
                       retweet_20d_avg_Uk_Politics+
                       retweet_20d_avg_US_Politics +
                       retweet_20d_avg_Violence+
                       retweet_20d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial(link= "probit"), design=Mod_Dataset_1))


#summary(avg_logit6)

stargazer(avg_logit6, type="text")



```

```{r, survey package lead retweet_25d_avg }



avg_logit7 <- (svyglm(DEM_SAT ~ retweet_25d_avg_Climate_Change+ 
                       retweet_25d_avg_Covid + 
                      retweet_25d_avg_Crime+
                       retweet_25d_avg_Daily_News+
                       retweet_25d_avg_Enterteinment+
                       retweet_25d_avg_Puppy_News+
                       retweet_25d_avg_Royal_Family+
                       retweet_25d_avg_Sports+
                       retweet_25d_avg_Uk_Politics+
                       retweet_25d_avg_US_Politics +
                       retweet_25d_avg_Violence+
                       retweet_25d_avg_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at, family=binomial(link= "probit"), design=Mod_Dataset_1))



#summary(avg_logit7)

stargazer(avg_logit7, type="text")

```



```{r, results on stargazer, results = 'asis', eval = FALSE}

stargazer(avg_logit8, title = "Logit Regression",
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes"),
                                                    c("Weights", "Yes")
                                                    ),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "", type = "latex", column.sep.width = "-45pt")


```

### Logit with Survey Weights - Retweets

```{r, logit with lead variables retweets, eval= FALSE}
# Logit regression 
logit_retweets_lead<- glm(DEM_SAT ~ lead_retweets_Climate_Change+ 
                       lead_retweets_Covid + 
                      lead_retweets_Crime+
                      # lead_retweets_Daily_News+
                      # lead_retweets_Enterteinment+
                      # lead_retweets_Puppy_News+
                       lead_retweets_Royal_Family+
                       lead_retweets_Sports+
                       lead_retweets_Uk_Politics+
                       lead_retweets_US_Politics +
                       lead_retweets_Violence+
                       #lead_retweets_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at,
                     data= Mod_Dataset, family = "binomial") 


# change name topics
# Week as fixed effect
summary(logit_retweets_lead)

```

```{r, logit with lead by 3 day variables retweets}

# Logit regression 
logit_retweets_3_lead<- (svyglm(DEM_SAT ~ lead_retweets_3_Climate_Change+ 
                       lead_retweets_3_Covid + 
                      lead_retweets_3_Crime+
                       lead_retweets_3_Daily_News+
                       lead_retweets_3_Enterteinment+
                       lead_retweets_3_Puppy_News+
                       lead_retweets_3_Royal_Family+
                       lead_retweets_3_Sports+
                       lead_retweets_3_Uk_Politics+
                       lead_retweets_3_US_Politics +
                       lead_retweets_3_Violence+
                       lead_retweets_3_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       leftpaper+ 
                       political_attention + created_at,
                     design= Mod_Dataset_1, family = "binomial")) 


# change name topics
# Week as fixed effect
summary(logit_retweets_3_lead)


# This undoes the log-odds (logit) transform:
#exp(coef(logit_retweets_3_lead)) / (1+exp(coef(logit_retweets_3_lead)))


#install.packages(ggeffects)
#library(ggeffects)
#ggeffect(logit_retweets_3_lead)  %>%
#  plot() 
  


```

```{r, logit with lead by 5 day variables retweets}

# Logit regression 
logit_retweets_5_lead<- (svyglm(DEM_SAT ~ lead_retweets_5_Climate_Change+ 
                       lead_retweets_5_Covid + 
                      lead_retweets_5_Crime+
                       lead_retweets_5_Daily_News+
                       lead_retweets_5_Enterteinment+
                       lead_retweets_5_Puppy_News+
                       lead_retweets_5_Royal_Family+
                       lead_retweets_5_Sports+
                       lead_retweets_5_Uk_Politics+
                       lead_retweets_5_US_Politics +
                       lead_retweets_5_Violence+
                       lead_retweets_5_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       leftpaper+ 
                       political_attention + created_at,
                     design= Mod_Dataset_1, family = "binomial")) 


# change name topics
# Week as fixed effect
summary(logit_retweets_5_lead)



```

```{r, logit with lead by 7 day variables retweets}

# Logit regression 
logit_retweets_7_lead<- (svyglm(DEM_SAT ~ lead_retweets_7_Climate_Change+ 
                       lead_retweets_7_Covid + 
                      lead_retweets_7_Crime+
                       lead_retweets_7_Daily_News+
                       lead_retweets_7_Enterteinment+
                       lead_retweets_7_Puppy_News+
                       lead_retweets_7_Royal_Family+
                       lead_retweets_7_Sports+
                       lead_retweets_7_Uk_Politics+
                       lead_retweets_7_US_Politics +
                       lead_retweets_7_Violence+
                       lead_retweets_7_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at,
                     design= Mod_Dataset_1, family = "binomial"))


# change name topics
# Week as fixed effect
summary(logit_retweets_7_lead)

```

```{r, logit with lead by 10 day variables retweets}

# Logit regression 
logit_retweets_10_lead<- (svyglm(DEM_SAT ~ lead_retweets_10_Climate_Change+ 
                       lead_retweets_10_Covid + 
                      lead_retweets_10_Crime+
                       lead_retweets_10_Daily_News+
                       lead_retweets_10_Enterteinment+
                       lead_retweets_10_Puppy_News+
                       lead_retweets_10_Royal_Family+
                       lead_retweets_10_Sports+
                       lead_retweets_10_Uk_Politics+
                       lead_retweets_10_US_Politics +
                       lead_retweets_10_Violence+
                       lead_retweets_10_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       leftpaper+ 
                       political_attention + created_at,
                     design= Mod_Dataset_1, family = "binomial") )


# change name topics
# Week as fixed effect
summary(logit_retweets_10_lead)



```


```{r, stargazer new}
stargazer(logit_retweets_10_lead, title = "Logit Regression",
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes"),
                                                    c("Weights FE", "Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          #notes = "Retweets are all standardised from 0 to 1.
          #\nCoefficients should be interpreted accordingly." 
          type = "text", column.sep.width = "-45pt")
```



```{r, table leads results, results='asis'}
library(stargazer)
stargazer(logit_retweets_lead, title = "Logit Regression",
          covariate.labels = c("1 Day Lead\nClimate Change RT",
                               "1 Day Lead\nCovid RT","1 Day Lead\nCrime RT",
                               "1 Day Lead\nRoyal Family\nRT",
                               "1 Day Lead\nSport RT", "1 Day Lead\nUK Politics\nRT",
                               "1 Day Lead\nUS Politics\nRT","1 Day Lead\nViolence RT"
                               ),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.
          \nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")


stargazer(logit_retweets_3_lead, title = "Logit Regression II",
          covariate.labels = c("3 Day Lead\nClimate Change RT",
                               "3 Day Lead\nCovid RT","3 Day Lead\nCrime RT",
                               "3 Day Lead\nRoyal Family\nRT",
                               "3 Day Lead\nSport RT", "3 Day Lead\nUK Politics\nRT",
                               "3 Day Lead\nUS Politics\nRT",
                               "3 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")


stargazer(logit_retweets_5_lead, title = "Logit Regression III",
          covariate.labels = c("5 Day Lead\nClimate Change RT",
                               "5 Day Lead\nCovid RT","5 Day Lead\nCrime RT",
                               "5 Day Lead\nRoyal Family\nRT",
                               "5 Day Lead\nSport RT", "5 Day Lead\nUK Politics\nRT",
                               "5 Day Lead\nUS Politics\nRT",
                               "5 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")





stargazer(logit_retweets_7_lead, title = "Logit Regression IV",
          covariate.labels = c("7 Day Lead\nClimate Change RT",
                               "7 Day Lead\nCovid RT","7 Day Lead\nCrime RT",
                               "7 Day Lead\nRoyal Family\nRT",
                               "7 Day Lead\nSport RT", "7 Day Lead\nUK Politics\nRT",
                               "7 Day Lead\nUS Politics\nRT",
                               "7 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")




stargazer(logit_retweets_10_lead, title = "Logit Regression V",
          covariate.labels = c("10 Day Lead\nClimate Change RT",
                               "10 Day Lead\nCovid RT","10 Day Lead\nCrime RT",
                               "10 Day Lead\nRoyal Family\nRT",
                               "10 Day Lead\nSport RT", "10 Day Lead\nUK Politics\nRT",
                               "10 Day Lead\nUS Politics\nRT",
                               "10 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")
```





## Separate topic variables Logit Models BBC ONLY Everything statistically significant. check again. 

```{r, upload data for modelling bbc}

setwd("/Users/HammoudG/OneDrive - London School of Economics/Satisfaction_Democracy_Project/")

# Omar Windows upload. 
#setwd("C:/Users/omarh/OneDrive - London School of Economics/Satisfaction_Democracy_Project")

Mod_Dataset<- read.csv("combined_survey_tweets.csv", stringsAsFactors = TRUE)



```

```{r, recode variables dataset bbc}

Mod_Dataset<- Mod_Dataset %>%
    filter(newspaper == "bbc_news") 


# set reference levels
Mod_Dataset$lastvote = relevel(Mod_Dataset$lastvote, ref = "Did not vote")

Mod_Dataset$profile_education_level = relevel(Mod_Dataset$profile_education_level, ref = "No_education")




Mod_Dataset$profile_gross_household <- recode_factor(Mod_Dataset$profile_gross_household, 
                               "under £5,000 per year"= "1",
   "£5,000 to £9,999 per year"= "2",
   "£10,000 to £14,999 per year"= "3",
   "£15,000 to £19,999 per year"= "4",
   "£20,000 to £24,999 per year"= "5",
   "£25,000 to £29,999 per year"= "6",
   "£30,000 to £34,999 per year"= "7",
   "£35,000 to £39,999 per year"= "8",
   "£40,000 to £44,999 per year"= "9",
   "£45,000 to £49,999 per year"= "10",
   "£50,000 to £59,999 per year"= "11",
   "£60,000 to £69,999 per year"= "12",
   "£70,000 to £99,999 per year"= "13",
   "£100,000 to £149,999 per year"= "14",
   "£150,000 and over"= "15",
   "Don't know"= "NA",
   "Prefer_not_to_answer" = "NA")


Mod_Dataset$profile_gross_household<- as.numeric(as.character(Mod_Dataset$profile_gross_household))



# Normalise distribution of all retweets 
Mod_Dataset<- Mod_Dataset %>%
mutate_at(c(6:197), funs(c(scale(.))))


```



```{r, logit with lead variables retweets bbc}
# Logit regression 
logit_retweets_lead<- glm(DEM_SAT ~ #lead_retweets_Climate_Change+ 
                       lead_retweets_Covid + 
                      lead_retweets_Crime+
                      # lead_retweets_Daily_News+
                      # lead_retweets_Enterteinment+
                      # lead_retweets_Puppy_News+
                       lead_retweets_Royal_Family+
                       lead_retweets_Sports+
                       lead_retweets_Uk_Politics+
                       lead_retweets_US_Politics +
                       lead_retweets_Violence+
                       #lead_retweets_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       political_attention + created_at,
                     data= Mod_Dataset, family = "binomial") 


# change name topics
# Week as fixed effect
summary(logit_retweets_lead)

```

```{r, logit with lead by 3 day variables retweets bbc}

# Logit regression 
logit_retweets_3_lead<- glm(DEM_SAT ~ #lead_retweets_3_Climate_Change+ 
                       lead_retweets_3_Covid + 
                      lead_retweets_3_Crime+
                      # lead_retweets_3_Daily_News+
                      # lead_retweets_3_Enterteinment+
                      # lead_retweets_3_Puppy_News+
                       lead_retweets_3_Royal_Family+
                       lead_retweets_3_Sports+
                       lead_retweets_3_Uk_Politics+
                       lead_retweets_3_US_Politics +
                       lead_retweets_3_Violence+
                       #lead_retweets_3_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       political_attention + created_at,
                     data= Mod_Dataset, family = "binomial") 


# change name topics
# Week as fixed effect
summary(logit_retweets_3_lead)



```

```{r, logit with lead by 5 day variables retweets bbc}

# Logit regression 
logit_retweets_5_lead<- glm(DEM_SAT ~ #lead_retweets_5_Climate_Change+ 
                       lead_retweets_5_Covid + 
                      lead_retweets_5_Crime+
                      # lead_retweets_5_Daily_News+
                      # lead_retweets_5_Enterteinment+
                      # lead_retweets_5_Puppy_News+
                       lead_retweets_5_Royal_Family+
                       lead_retweets_5_Sports+
                       lead_retweets_5_Uk_Politics+
                       lead_retweets_5_US_Politics +
                       lead_retweets_5_Violence+
                       #lead_retweets_5_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       political_attention + created_at,
                     data= Mod_Dataset, family = "binomial") 


# change name topics
# Week as fixed effect
summary(logit_retweets_5_lead)



```

```{r, logit with lead by 7 day variables retweets bbc}

# Logit regression 
logit_retweets_7_lead<- glm(DEM_SAT ~ #lead_retweets_7_Climate_Change+ 
                       lead_retweets_7_Covid + 
                      lead_retweets_7_Crime+
                      # lead_retweets_7_Daily_News+
                      # lead_retweets_7_Enterteinment+
                      # lead_retweets_7_Puppy_News+
                       lead_retweets_7_Royal_Family+
                       lead_retweets_7_Sports+
                       lead_retweets_7_Uk_Politics+
                       lead_retweets_7_US_Politics +
                       lead_retweets_7_Violence+
                       #lead_retweets_7_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                        political_attention + created_at,
                     data= Mod_Dataset, family = "binomial") 


# change name topics
# Week as fixed effect
summary(logit_retweets_7_lead)

```

```{r, logit with lead by 10 day variables retweets bbc}

# Logit regression 
logit_retweets_10_lead<- glm(DEM_SAT ~ #lead_retweets_10_Climate_Change+ 
                       lead_retweets_10_Covid + 
                      lead_retweets_10_Crime+
                      # lead_retweets_10_Daily_News+
                      # lead_retweets_10_Enterteinment+
                      # lead_retweets_10_Puppy_News+
                       lead_retweets_10_Royal_Family+
                       lead_retweets_10_Sports+
                       lead_retweets_10_Uk_Politics+
                       lead_retweets_10_US_Politics +
                       lead_retweets_10_Violence+
                       #lead_retweets_10_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       political_attention + created_at,
                     data= Mod_Dataset, family = "binomial") 


# change name topics
# Week as fixed effect
summary(logit_retweets_10_lead)



```


```{r, table leads results bbc, results='asis'}

stargazer(logit_retweets_lead, title = "Logit Regression",
          covariate.labels = c("1 Day Lead\nCovid RT","1 Day Lead\nCrime RT",
                               "1 Day Lead\nRoyal Family\nRT",
                               "1 Day Lead\nSport RT", "1 Day Lead\nUK Politics\nRT",
                               "1 Day Lead\nUS Politics\nRT","1 Day Lead\nViolence RT"
                               ),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.
          \nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")


stargazer(logit_retweets_3_lead, title = "Logit Regression II",
          covariate.labels = c("3 Day Lead\nCovid RT","3 Day Lead\nCrime RT",
                               "3 Day Lead\nRoyal Family\nRT",
                               "3 Day Lead\nSport RT", "3 Day Lead\nUK Politics\nRT",
                               "3 Day Lead\nUS Politics\nRT",
                               "3 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")


stargazer(logit_retweets_5_lead, title = "Logit Regression III",
          covariate.labels = c("5 Day Lead\nCovid RT","5 Day Lead\nCrime RT",
                               "5 Day Lead\nRoyal Family\nRT",
                               "5 Day Lead\nSport RT", "5 Day Lead\nUK Politics\nRT",
                               "5 Day Lead\nUS Politics\nRT",
                               "5 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")





stargazer(logit_retweets_7_lead, title = "Logit Regression IV",
          covariate.labels = c("7 Day Lead\nCovid RT","7 Day Lead\nCrime RT",
                               "7 Day Lead\nRoyal Family\nRT",
                               "7 Day Lead\nSport RT", "7 Day Lead\nUK Politics\nRT",
                               "7 Day Lead\nUS Politics\nRT",
                               "7 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")




stargazer(logit_retweets_10_lead, title = "Logit Regression V",
          covariate.labels = c("10 Day Lead\nCovid RT","10 Day Lead\nCrime RT",
                               "10 Day Lead\nRoyal Family\nRT",
                               "10 Day Lead\nSport RT", "10 Day Lead\nUK Politics\nRT",
                               "10 Day Lead\nUS Politics\nRT",
                               "10 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")
```




## Separate topic variables Logit Models LAGS for robustness checks

```{r, upload data for modelling lags}

setwd("/Users/HammoudG/OneDrive - London School of Economics/Satisfaction_Democracy_Project/")

# Omar Windows upload. 
#setwd("C:/Users/omarh/OneDrive - London School of Economics/Satisfaction_Democracy_Project")

Mod_Dataset<- read.csv("combined_survey_tweets.csv", stringsAsFactors = TRUE)

#glimpse(Mod_Dataset)

```

```{r, recode variables dataset  lags}

Mod_Dataset<- Mod_Dataset %>%
    mutate(leftpaper = ifelse(newspaper %in% c("guardian","independent","daily_mirror"),"1","0"))

Mod_Dataset$leftpaper<- as.factor(Mod_Dataset$leftpaper)

Mod_Dataset<- Mod_Dataset %>%
    filter(newspaper != "bbc_news") 


# set reference levels
Mod_Dataset$lastvote = relevel(Mod_Dataset$lastvote, ref = "Did not vote")

Mod_Dataset$profile_education_level = relevel(Mod_Dataset$profile_education_level, ref = "No_education")




Mod_Dataset$profile_gross_household <- recode_factor(Mod_Dataset$profile_gross_household, 
                               "under £5,000 per year"= "1",
   "£5,000 to £9,999 per year"= "2",
   "£10,000 to £14,999 per year"= "3",
   "£15,000 to £19,999 per year"= "4",
   "£20,000 to £24,999 per year"= "5",
   "£25,000 to £29,999 per year"= "6",
   "£30,000 to £34,999 per year"= "7",
   "£35,000 to £39,999 per year"= "8",
   "£40,000 to £44,999 per year"= "9",
   "£45,000 to £49,999 per year"= "10",
   "£50,000 to £59,999 per year"= "11",
   "£60,000 to £69,999 per year"= "12",
   "£70,000 to £99,999 per year"= "13",
   "£100,000 to £149,999 per year"= "14",
   "£150,000 and over"= "15",
   "Don't know"= "NA",
   "Prefer_not_to_answer" = "NA")


Mod_Dataset$profile_gross_household<- as.numeric(as.character(Mod_Dataset$profile_gross_household))



# Normalise distribution of all retweets 
Mod_Dataset<- Mod_Dataset %>%
mutate_at(c(6:197), funs(c(scale(.))))


```


```{r, logit with  lags by 7 day variables retweets}



# Logit regression 
logit_retweets_7_lag<- glm(DEM_SAT ~ #lag_retweets_7_Climate_Change+ 
                       lag_retweets_7_Covid + 
                      lag_retweets_7_Crime+
                      # lag_retweets_7_Daily_News+
                      # lag_retweets_7_Enterteinment+
                      # lag_retweets_7_Puppy_News+
                       lag_retweets_7_Royal_Family+
                       lag_retweets_7_Sports+
                       lag_retweets_7_Uk_Politics+
                       lag_retweets_7_US_Politics +
                       lag_retweets_7_Violence+
                       #lag_retweets_7_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at,
                     data= Mod_Dataset, family = "binomial") 


# change name topics
# Week as fixed effect
summary(logit_retweets_7_lag)

```

```{r, logit with  lags by 10 day variables retweets}

# Logit regression 
logit_retweets_14_lag<- glm(DEM_SAT ~ #lag_retweets_14_Climate_Change+ 
                       lag_retweets_14_Covid + 
                      lag_retweets_14_Crime+
                      # lag_retweets_14_Daily_News+
                      # lag_retweets_14_Enterteinment+
                      # lag_retweets_14_Puppy_News+
                       lag_retweets_14_Royal_Family+
                       lag_retweets_14_Sports+
                       lag_retweets_14_Uk_Politics+
                       lag_retweets_14_US_Politics +
                       lag_retweets_14_Violence+
                       #lag_retweets_14_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       leftpaper+ 
                       political_attention + created_at,
                     data= Mod_Dataset, family = "binomial") 


# change name topics
# Week as fixed effect
summary(logit_retweets_14_lag)



```


```{r, table lag results, results='asis'}



stargazer(logit_retweets_7_lead, title = "Logit Regression I - Lags",
          covariate.labels = c("7 Day Lead\nCovid RT","7 Day Lead\nCrime RT",
                               "7 Day Lead\nRoyal Family\nRT",
                               "7 Day Lead\nSport RT", "7 Day Lead\nUK Politics\nRT",
                               "7 Day Lead\nUS Politics\nRT",
                               "7 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")




stargazer(logit_retweets_14_lead, title = "Logit Regression V - Lags",
          covariate.labels = c("14 Day Lead\nCovid RT","14 Day Lead\nCrime RT",
                               "14 Day Lead\nRoyal Family\nRT",
                               "14 Day Lead\nSport RT", "14 Day Lead\nUK Politics\nRT",
                               "14 Day Lead\nUS Politics\nRT",
                               "14 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")
```











# Appendix Probit Regression models  


```{r, upload data for modelling probit}

setwd("/Users/HammoudG/OneDrive - London School of Economics/Satisfaction_Democracy_Project/")

# Omar Windows upload. 
#setwd("C:/Users/omarh/OneDrive - London School of Economics/Satisfaction_Democracy_Project")

Mod_Dataset<- read.csv("combined_survey_tweets.csv", stringsAsFactors = TRUE)



```

```{r, recode variables dataset probit}

Mod_Dataset<- Mod_Dataset %>%
    mutate(leftpaper = ifelse(newspaper %in% c("guardian","independent","daily_mirror"),"1","0"))

Mod_Dataset$leftpaper<- as.factor(Mod_Dataset$leftpaper)

Mod_Dataset<- Mod_Dataset %>%
    filter(newspaper != "bbc_news") 


# set reference levels
Mod_Dataset$lastvote = relevel(Mod_Dataset$lastvote, ref = "Did not vote")

Mod_Dataset$profile_education_level = relevel(Mod_Dataset$profile_education_level, ref = "No_education")




Mod_Dataset$profile_gross_household <- recode_factor(Mod_Dataset$profile_gross_household, 
                               "under £5,000 per year"= "1",
   "£5,000 to £9,999 per year"= "2",
   "£10,000 to £14,999 per year"= "3",
   "£15,000 to £19,999 per year"= "4",
   "£20,000 to £24,999 per year"= "5",
   "£25,000 to £29,999 per year"= "6",
   "£30,000 to £34,999 per year"= "7",
   "£35,000 to £39,999 per year"= "8",
   "£40,000 to £44,999 per year"= "9",
   "£45,000 to £49,999 per year"= "10",
   "£50,000 to £59,999 per year"= "11",
   "£60,000 to £69,999 per year"= "12",
   "£70,000 to £99,999 per year"= "13",
   "£100,000 to £149,999 per year"= "14",
   "£150,000 and over"= "15",
   "Don't know"= "NA",
   "Prefer_not_to_answer" = "NA")


Mod_Dataset$profile_gross_household<- as.numeric(as.character(Mod_Dataset$profile_gross_household))



# Normalise distribution of all retweets 
Mod_Dataset<- Mod_Dataset %>%
mutate_at(c(6:197), funs(c(scale(.))))


```



```{r, probit with lead variables retweets }
# Logit regression 
probit_retweets_lead<- glm(DEM_SAT ~ lead_retweets_Climate_Change+ 
                       lead_retweets_Covid + 
                      lead_retweets_Crime+
                      # lead_retweets_Daily_News+
                      # lead_retweets_Enterteinment+
                      # lead_retweets_Puppy_News+
                       lead_retweets_Royal_Family+
                       lead_retweets_Sports+
                       lead_retweets_Uk_Politics+
                       lead_retweets_US_Politics +
                       lead_retweets_Violence+
                       #lead_retweets_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at,
                     data= Mod_Dataset, family = binomial(link = "probit")) 


# change name topics
# Week as fixed effect
summary(probit_retweets_lead)

```

```{r, probit with lead by 3 day variables retweets}

# probit regression 
probit_retweets_3_lead<- glm(DEM_SAT ~ lead_retweets_3_Climate_Change+ 
                       lead_retweets_3_Covid + 
                      lead_retweets_3_Crime+
                      # lead_retweets_3_Daily_News+
                      # lead_retweets_3_Enterteinment+
                      # lead_retweets_3_Puppy_News+
                       lead_retweets_3_Royal_Family+
                       lead_retweets_3_Sports+
                       lead_retweets_3_Uk_Politics+
                       lead_retweets_3_US_Politics +
                       lead_retweets_3_Violence+
                       #lead_retweets_3_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       leftpaper+ 
                       political_attention + created_at,
                     data= Mod_Dataset, family = binomial(link = "probit")) 


# change name topics
# Week as fixed effect
summary(probit_retweets_3_lead)



```

```{r, probit with lead by 5 day variables retweets}

# probit regression 
probit_retweets_5_lead<- glm(DEM_SAT ~ lead_retweets_5_Climate_Change+ 
                       lead_retweets_5_Covid + 
                      lead_retweets_5_Crime+
                      # lead_retweets_5_Daily_News+
                      # lead_retweets_5_Enterteinment+
                      # lead_retweets_5_Puppy_News+
                       lead_retweets_5_Royal_Family+
                       lead_retweets_5_Sports+
                       lead_retweets_5_Uk_Politics+
                       lead_retweets_5_US_Politics +
                       lead_retweets_5_Violence+
                       #lead_retweets_5_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       leftpaper+ 
                       political_attention + created_at,
                     data= Mod_Dataset, family = binomial(link = "probit")) 


# change name topics
# Week as fixed effect
summary(probit_retweets_5_lead)



```

```{r, probit with lead by 7 day variables retweets}

# probit regression 
probit_retweets_7_lead<- glm(DEM_SAT ~ lead_retweets_7_Climate_Change+ 
                       lead_retweets_7_Covid + 
                      lead_retweets_7_Crime+
                      # lead_retweets_7_Daily_News+
                      # lead_retweets_7_Enterteinment+
                      # lead_retweets_7_Puppy_News+
                       lead_retweets_7_Royal_Family+
                       lead_retweets_7_Sports+
                       lead_retweets_7_Uk_Politics+
                       lead_retweets_7_US_Politics +
                       lead_retweets_7_Violence+
                       #lead_retweets_7_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR + 
                       leftpaper+ 
                       political_attention + created_at,
                     data= Mod_Dataset, family = binomial(link = "probit")) 


# change name topics
# Week as fixed effect
summary(probit_retweets_7_lead)

```

```{r, probit with lead by 10 day variables retweets}

# probit regression 
probit_retweets_10_lead<- glm(DEM_SAT ~ lead_retweets_10_Climate_Change+ 
                       lead_retweets_10_Covid + 
                      lead_retweets_10_Crime+
                      # lead_retweets_10_Daily_News+
                      # lead_retweets_10_Enterteinment+
                      # lead_retweets_10_Puppy_News+
                       lead_retweets_10_Royal_Family+
                       lead_retweets_10_Sports+
                       lead_retweets_10_Uk_Politics+
                       lead_retweets_10_US_Politics +
                       lead_retweets_10_Violence+
                       #lead_retweets_10_Weather +
                      profile_gross_household + 
                       lastvote + pastvote_EURef  + 
                       age + profile_gender  +
                       profile_education_level+ 
                       profile_GOR +
                       leftpaper+ 
                       political_attention + created_at,
                     data= Mod_Dataset, family = binomial(link = "probit")) 


# change name topics
# Week as fixed effect
summary(probit_retweets_10_lead)



```


```{r, table leads results, results='asis'}

stargazer(probit_retweets_lead, title = "Probit Regression",
          covariate.labels = c("1 Day Lead\nClimate Change RT",
                               "1 Day Lead\nCovid RT","1 Day Lead\nCrime RT",
                               "1 Day Lead\nRoyal Family\nRT",
                               "1 Day Lead\nSport RT", "1 Day Lead\nUK Politics\nRT",
                               "1 Day Lead\nUS Politics\nRT","1 Day Lead\nViolence RT"
                               ),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.
          \nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")


stargazer(probit_retweets_3_lead, title = "Probit Regression II",
          covariate.labels = c("3 Day Lead\nClimate Change RT",
                               "3 Day Lead\nCovid RT","3 Day Lead\nCrime RT",
                               "3 Day Lead\nRoyal Family\nRT",
                               "3 Day Lead\nSport RT", "3 Day Lead\nUK Politics\nRT",
                               "3 Day Lead\nUS Politics\nRT",
                               "3 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")


stargazer(probit_retweets_5_lead, title = "Probit Regression III",
          covariate.labels = c("5 Day Lead\nClimate Change RT",
                               "5 Day Lead\nCovid RT","5 Day Lead\nCrime RT",
                               "5 Day Lead\nRoyal Family\nRT",
                               "5 Day Lead\nSport RT", "5 Day Lead\nUK Politics\nRT",
                               "5 Day Lead\nUS Politics\nRT",
                               "5 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")





stargazer(probit_retweets_7_lead, title = "Probit Regression IV",
          covariate.labels = c("7 Day Lead\nClimate Change RT",
                               "7 Day Lead\nCovid RT","7 Day Lead\nCrime RT",
                               "7 Day Lead\nRoyal Family\nRT",
                               "7 Day Lead\nSport RT", "7 Day Lead\nUK Politics\nRT",
                               "7 Day Lead\nUS Politics\nRT",
                               "7 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")




stargazer(probit_retweets_10_lead, title = "Probit Regression V",
          covariate.labels = c("10 Day Lead\nClimate Change RT",
                               "10 Day Lead\nCovid RT","10 Day Lead\nCrime RT",
                               "10 Day Lead\nRoyal Family\nRT",
                               "10 Day Lead\nSport RT", "10 Day Lead\nUK Politics\nRT",
                               "10 Day Lead\nUS Politics\nRT",
                               "10 Day Lead\nViolence RT"),
          omit= c("Constant", "age", "profile", "created", "lastvoteOther",
                  "pastvote","lastvote","political","leftpaper"),
          digits = 2, align= TRUE, add.lines = list(c("Week FE", "Yes"), 
                                                    c("Controls","Yes")),
          dep.var.labels = c("Satisfaction with Democracy"), font.size = "small",
          header=FALSE, 
          notes = "Retweets are all standardised from 0 to 1.\nCoefficients should be interpreted accordingly.", type = "latex", column.sep.width = "-45pt")
```

# Appendix Plots

```{r, Sentiment of various topics, eval= FALSE}
#glimpse(Tweets)

#Tweets_Sent_Sum<- Tweets %>%
#  group_by(created_at.x, topic) %>%
#  summarise(mean_sent= mean(sentiment_value)) %>%
#  ungroup()

Tweets_Sent_Sum<- Tweets

library(lubridate)

# Date as Date format
Tweets_Sent_Sum$created_at.x<- as.Date(as.character(Tweets_Sent_Sum$created_at.x))

Tweets_Sent_Sum<- Tweets_Sent_Sum %>%
   dplyr::group_by(week =floor_date(created_at.x, "7 days"), topic) %>%
    summarise(mean_sentiment= mean(sentiment_value)) %>%
    ungroup()

Tweets_Sent_Sum<- Tweets_Sent_Sum %>%
    mutate(topic =  fct_recode(topic, 
                             "Climate Change" = "Climate_Change", 
                             "Daily News"= "Daily_News",
                             "Entertainment"="Enterteinment",
                             "Animals and Pets"="Puppy_News",
                             "Royal Family"="Royal_Family",
                             "UK Politics"="Uk_Politics",
                             "US Politics"="US_Politics",
                             "Political Violence"="Violence"))




ALL_Tweets_Sentiment<- ggplot(Tweets_Sent_Sum, aes(week, mean_sentiment)) +
    #geom_area(alpha = 0.9) +
    geom_line()+
    facet_wrap(~ topic, ncol= 3)+
    labs(title= "Average Sentiment across Topics",x = "", 
         y = "Average Sentiment")+
    theme_bw()+
   theme(axis.text.x = element_text(angle = 45, hjust=1), text = element_text(size = 25))+ 
  geom_hline(yintercept=0, linetype="dashed", color = "red")+
  scale_x_date(labels = date_format("%b %y"))+
  ylim(-3,3)

ALL_Tweets_Sentiment

setwd("/Users/HammoudG/Documents/GitHub/Satisfaction_Democracy/data-viz/")

ggsave("all_tweets_sentiments_plot.png",ALL_Tweets_Sentiment, height = 10, width = 12)

```

```{r, sentiment of topics by newspaper}


Tweets_Sent_Sum<- Tweets

library(lubridate)

# Date as Date format
Tweets_Sent_Sum$created_at.x<- as.Date(as.character(Tweets_Sent_Sum$created_at.x))


# Calculate mean and upper and lower bound for CI 

alpha <- 0.05

Tweets_Sent_Sum1<- Tweets_Sent_Sum %>%
   dplyr::group_by(topic, newspaper) %>%
    summarize(mean = mean(sentiment_value),
              lower = mean(sentiment_value) - qt(1- alpha/2, (n() - 1))*sd(sentiment_value)/sqrt(n()),
              upper = mean(sentiment_value) + qt(1- alpha/2, (n() - 1))*sd(sentiment_value)/sqrt(n()))




Tweets_Sent_Sum1<- Tweets_Sent_Sum1 %>%
    mutate(topic =  fct_recode(topic, 
                             "Climate Change" = "Climate_Change", 
                             "Daily News"= "Daily_News",
                             "Entertainment"="Enterteinment",
                             "Animals and Pets"="Puppy_News",
                             "Royal Family"="Royal_Family",
                             "UK Politics"="Uk_Politics",
                             "US Politics"="US_Politics",
                             "Political Violence"="Violence"))


ALL_Tweets_Sentiment_Newspaper<- ggplot(Tweets_Sent_Sum1, aes(x=newspaper, y=mean, colour=topic)) + 
    geom_errorbar(aes(ymin=lower, ymax=upper), colour="black", width=.5) +
    geom_line() +
    geom_point( size=1)+
    facet_wrap(~ topic)+
   geom_hline(yintercept=0, linetype="dashed", color = "red")+
     labs(x= "",y= "Mean Sentiment", title = "Mean Sentiment of Tweets by Topic for All Newspapers")+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90), text = element_text(size = 25))+
  theme(legend.position = "none")+
  ylim(-3,3)


setwd("/Users/HammoudG/Documents/GitHub/Satisfaction_Democracy/data-viz/")

ggsave("all_tweets_sentiments_by_newspaper_plot.png",ALL_Tweets_Sentiment_Newspaper, height = 10, width = 12)

```

```{r, calculate change in coverage by newspaper ideology, eval= FALSE}

# Give ideology to newspapers
Tweets_Ideology<- Tweets %>%
  mutate(newspaper_ideology =  fct_recode(newspaper, 
                             "None" = "bbc_news", 
                             "Right Wing Newspaper"= "daily_mail",
                             "None" ="ft",
                             "Left Wing Newspaper" ="independent",
                             "Right Wing Newspaper" ="sun",
                              "Right Wing Newspaper" = "telegraph",
                             "Right Wing Newspaper" ="times",
                             "Left Wing Newspaper" = "guardian",
                             "Left Wing Newspaper" = "daily_mirror" ))

# Filter out bbc and FT
Tweets_Ideology<- Tweets_Ideology %>%
    filter(newspaper_ideology != "None") #%>%
    #drop.levels()

# Only negative topics
Tweets_Ideology<- Tweets_Ideology %>%
    filter(Type_Sentiment == "Negative_Topic")

# Percentage of negative topics by newspaper ideology and date
Tweets_Ideology_Sum<- Tweets_Ideology %>%
  group_by(created_at.x, newspaper_ideology) %>%
  summarise(n= n()) %>%
  group_by(created_at.x) %>%
  mutate(perc = 100*n/sum(n)) 


# Date as Date format
Tweets_Ideology_Sum$created_at.x<- as.Date(as.character(Tweets_Ideology_Sum$created_at.x))


# Percentage of negative topics by newspaper ideology and week 
Tweets_Ideology_Sum<- Tweets_Ideology_Sum %>%
   dplyr::group_by(week =floor_date(created_at.x, "7 days"), 
                   newspaper_ideology) %>%
    mutate(perc_week = mean(perc))



ALL_Tweets_Topics<- ggplot(Tweets_Ideology_Sum, aes(week, perc_week, col = newspaper_ideology)) +
    #geom_area(alpha = 0.9) +
    geom_line()+
#    facet_wrap(~ topic)+
    labs(title= "Change in Negative Topics Covered",x = "", 
         y = "Percentage of Negative Coverage Tweets")+
    theme_bw()+
   scale_x_date(labels = date_format("%b %y"))+
   theme(axis.text.x = element_text(angle = 45, hjust=1))

ALL_Tweets_Topics

```



# NON-USED Regression Discontinuity

```{r, upload data}

setwd("/Users/HammoudG/OneDrive - London School of Economics/Satisfaction_Democracy_Project/")

# Omar Windows upload. 
#setwd("C:/Users/omarh/OneDrive - London School of Economics/Satisfaction_Democracy_Project")

Diff_Diff_Data<- read.csv("combined_survey_tweets.csv", stringsAsFactors = TRUE)


# Select relevant variables

Diff_Diff_Data_Sel<- Diff_Diff_Data %>%
    dplyr::select(newspaper, created_at, DEM_SAT, weight, lastvote)

#glimpse(Diff_Diff_Data_Sel)


# Newspaper readership
Diff_Diff_Data_Sel<- Diff_Diff_Data_Sel %>%
    filter(newspaper != "ft") %>%
    mutate(newspaper_readership = fct_recode(newspaper,"leftwing"="guardian",
                                                   "leftwing"="independent",
                                                   "leftwing"="daily_mirror",
                                                "rightwing"= "daily_mail",
                                                "rightwing"= "sun",
                                                "rightwing"= "telegraph",
                                                "rightwing"= "times",
                                                "non readers"= "bbc_news"))


# to transform character into Posixct
Diff_Diff_Data_Sel$created_at <- strptime(Diff_Diff_Data_Sel$created_at, format = "%Y-%m-%d")

# Posixct into date
Diff_Diff_Data_Sel$created_at <- as.Date(as.character(as.POSIXct(Diff_Diff_Data_Sel$created_at)))


```


```{r, George Floyd}

# Event - George Floyd. 3 days before and after. Both readers and non-readers.
George_Floyd<- Diff_Diff_Data_Sel %>%
    filter(created_at >="2020-05-15" & created_at <= "2020-06-05") %>%
    filter(created_at != "2020-05-25")  # remove day in which killing happened


George_Floyd_Summ<- George_Floyd %>%
    group_by(created_at, DEM_SAT, newspaper_readership) %>%
    summarise(n = n()) %>%
    mutate(perc = n/sum(n)) %>%
    filter(n > 25) %>%
    filter(DEM_SAT == 1)

# to transform character into Posixct
George_Floyd_Summ$created_at <- strptime(George_Floyd_Summ$created_at, format = "%Y-%m-%d")

# Posixct into date
George_Floyd_Summ$created_at <- as.Date(as.character(as.POSIXct(George_Floyd_Summ$created_at)))


ggplot(George_Floyd_Summ,aes(created_at, perc, col = as.factor(newspaper_readership)))+
  geom_line()+
  geom_vline(xintercept = as.Date("2020-05-25"))




George_Floyd_Summ<- George_Floyd %>%
    group_by(created_at, DEM_SAT, lastvote) %>%
    summarise(n = n()) %>%
    mutate(perc = n/sum(n)) %>%
    filter(n > 25) %>%
    filter(DEM_SAT == 1)

ggplot(George_Floyd_Summ,aes(created_at, perc, col = as.factor(lastvote)))+
  geom_line()+
  geom_vline(xintercept = as.Date("2020-05-25"))



# alternative

George_Floyd<- George_Floyd %>%
    group_by(DEM_SAT,created_at) %>%
    filter(n()>= 25)%>%
    ungroup()

George_Floyd_Weight<- crosstab(df= George_Floyd, x = created_at, y=DEM_SAT , weight= weight, format = "long")

George_Floyd_Weight<- George_Floyd_Weight %>% filter(DEM_SAT == 1)

ggplot(George_Floyd_Weight,aes(created_at, perc, col = as.factor(newspaper_readership)))+
  geom_line()+
  geom_vline(xintercept = as.Date("2020-05-25"))


######################



# Event - Capitol Hill 3 days before and after. Both readers and non-readers.
Capitol_Hill<- Diff_Diff_Data_Sel %>%
    filter(created_at >="2021-01-01" & created_at <= "2021-01-15") %>%
    filter(created_at != "2021-01-06")  # remove day in which killing happened


Capitol_Hill<- Capitol_Hill %>%
    group_by(DEM_SAT,created_at) %>%
    filter(n()>= 25)%>%
    ungroup()



library(pollster)

#Capitol_Hill_Weight<- crosstab(df= Capitol_Hill, x = DEM_SAT, y=created_at, weight= weight, format = "long")


Capitol_Hill_Summ<- Capitol_Hill %>%
    group_by(created_at, DEM_SAT, newspaper_readership) %>%
    summarise(n = n()) %>%
    mutate(perc = n/sum(n)) %>%
  filter(n>= 25)%>%
    filter(DEM_SAT == 1)



# to transform character into Posixct
Capitol_Hill_Summ$created_at <- strptime(Capitol_Hill_Summ$created_at, format = "%Y-%m-%d")

# Posixct into date
Capitol_Hill_Summ$created_at <- as.Date(as.character(as.POSIXct(Capitol_Hill_Summ$created_at)))




ggplot(Capitol_Hill_Summ,aes(created_at, perc, col = as.factor(newspaper_readership)))+
  geom_line()+
  geom_vline(xintercept = as.Date("2021-01-06"))
```

```{r, dominic cummings}

# Event - George Floyd. 3 days before and after. Both readers and non-readers.
Dominic_Cummings<- Diff_Diff_Data_Sel %>%
    filter(created_at >="2020-05-13" & created_at <= "2020-06-02") %>%
    filter(created_at != "2020-05-22")  # remove day in which killing happened


Dominic_Cummings_Summ<- Dominic_Cummings %>%
    group_by(created_at, DEM_SAT, newspaper_readership) %>%
    summarise(n = n()) %>%
    mutate(perc = n/sum(n)) %>%
    filter(n > 25) %>%
    filter(DEM_SAT == 1)

# to transform character into Posixct
Dominic_Cummings_Summ$created_at <- strptime(Dominic_Cummings_Summ$created_at, format = "%Y-%m-%d")

# Posixct into date
Dominic_Cummings_Summ$created_at <- as.Date(as.character(as.POSIXct(Dominic_Cummings_Summ$created_at)))


ggplot(Dominic_Cummings_Summ,aes(created_at, perc, col = as.factor(newspaper_readership)))+
  geom_line()+
  geom_vline(xintercept = as.Date("2020-05-22"))


```


# further analysis of change in contents over time, Xavi 1August2022

# Classification of topics as negative

```{r, calculate change in coverage by newspaper ideology, eval= FALSE}

# Identifying negative vs neutral/positive topics
Tweets %>%
  group_by(topic) %>%
  summarise_at(vars(sentiment_value), list(name = mean))

# Recode those with an average sentiment below -0.5 as negative and significantly negative for all newspapers
Tweets <- Tweets %>%
  mutate(topic_sentiment =  fct_recode(topic, 
                                       "Negative" = "Climate_Change",
                                       "Negative" = "Covid",
                                       "Negative" = "Crime",
                                       "Negative" = "Daily_News",
                                       "Positive" = "Enterteinment",	
                                       "Positive" =  "Puppy_News",
                                       "Positive" = "Royal_Family",
                                       "Positive" = "Sports",
                                       "Negative" = "Uk_Politics",
                                       "Negative" = "US_Politics",
                                       "Negative" = "Violence",
                                       "Negative" = "Weather" ))


# Give ideology to newspapers
Tweets_Ideology<- Tweets %>%
  mutate(newspaper_ideology =  fct_recode(newspaper, 
                             "None" = "bbc_news", 
                             "Right Wing Newspaper"= "daily_mail",
                             "None" ="ft",
                             "Left Wing Newspaper" ="independent",
                             "Right Wing Newspaper" ="sun",
                              "Right Wing Newspaper" = "telegraph",
                             "Right Wing Newspaper" ="times",
                             "Left Wing Newspaper" = "guardian",
                             "Left Wing Newspaper" = "daily_mirror" ))


```

# Average sentiment over time

```{r, average sentiment across newspaper groups}

# Give ideology to newspapers
Tweets_Ideology<- Tweets %>%
  mutate(newspaper_ideology =  fct_recode(newspaper, 
                             "None" = "bbc_news", 
                             "Right Wing Newspaper"= "daily_mail",
                             "None" ="ft",
                             "Left Wing Newspaper" ="independent",
                             "Right Wing Newspaper" ="sun",
                              "Right Wing Newspaper" = "telegraph",
                             "Right Wing Newspaper" ="times",
                             "Left Wing Newspaper" = "guardian",
                             "Left Wing Newspaper" = "daily_mirror" ))

# Filter out bbc and FT
Tweets_Ideology<- Tweets_Ideology %>%
    filter(newspaper_ideology != "None") #%>%
    #drop.levels()

# Average sentiment by newspaper and day
Sentiment_Ideology_Sum <- Tweets_Ideology %>%
  group_by(created_at.x, newspaper) %>%
  summarise(average_sentiment = weighted.mean(sentiment_value, public_metrics.retweet_count)) %>%
  ungroup()

# Date as Date format
Sentiment_Ideology_Sum$created_at.x<- as.Date(as.character(Sentiment_Ideology_Sum$created_at.x))

#Plot average sentiment per newspaper
Figure_Sentiment_Ideology_Sum <- ggplot(data = Sentiment_Ideology_Sum, aes(x=created_at.x, y=average_sentiment)) +
    #geom_area(alpha = 0.9) +
    geom_smooth(method="loess")+
    facet_wrap(~ newspaper)+
    labs(title= "Average Sentiment across Newspapers",x = "", 
         y = "Average Sentiment")+
    theme_bw()+
   theme(axis.text.x = element_text(angle = 45, hjust=1))+ 
  scale_x_date(labels = date_format("%b %y"))

Figure_Sentiment_Ideology_Sum


# Average sentiment by newspaper ideology and day
Sentiment_Ideology_Sum <- Tweets_Ideology %>%
  group_by(created_at.x, newspaper_ideology) %>%
  summarise(average_sentiment = weighted.mean(sentiment_value, public_metrics.retweet_count)) %>%
  ungroup()

# Date as Date format
Sentiment_Ideology_Sum$created_at.x<- as.Date(as.character(Sentiment_Ideology_Sum$created_at.x))

#Plot average sentiment per newspaper
Figure_Sentiment_Ideology_Sum <- ggplot(data = Sentiment_Ideology_Sum, aes(x=created_at.x, y=average_sentiment, col = newspaper_ideology)) +
    #geom_area(alpha = 0.9) 
    geom_smooth(method = "loess")+
#    facet_wrap(~ newspaper_ideology)+
    labs(title= "Average Sentiment around Royal Family across Newspapers",x = "", 
         y = "Average Sentiment")+
    theme_bw()+
   theme(axis.text.x = element_text(angle = 45, hjust=1))+ 
  scale_x_date(labels = date_format("%b %y"))

Figure_Sentiment_Ideology_Sum




```

# Distribution of topics over time

```{r, average sentiment across newspaper groups}

# Give ideology to newspapers
Tweets_Ideology<- Tweets %>%
  mutate(newspaper_ideology =  fct_recode(newspaper, 
                             "None" = "bbc_news", 
                             "Right Wing Newspaper"= "daily_mail",
                             "None" ="ft",
                             "Left Wing Newspaper" ="independent",
                             "Right Wing Newspaper" ="sun",
                              "Right Wing Newspaper" = "telegraph",
                             "Right Wing Newspaper" ="times",
                             "Left Wing Newspaper" = "guardian",
                             "Left Wing Newspaper" = "daily_mirror" ))

# Filter out bbc and FT
Tweets_Ideology<- Tweets_Ideology %>%
    filter(newspaper_ideology != "None") #%>%
    #drop.levels()

# Average sentiment by newspaper and day
Sentiment_Ideology_Sum <- Tweets_Ideology %>%
  group_by(created_at.x, newspaper) %>%
  summarise(average_sentiment = weighted.mean(sentiment_value, public_metrics.retweet_count)) %>%
  ungroup()

# Date as Date format
Sentiment_Ideology_Sum$created_at.x<- as.Date(as.character(Sentiment_Ideology_Sum$created_at.x))

#Plot average sentiment per newspaper
Figure_Sentiment_Ideology_Sum <- ggplot(data = Sentiment_Ideology_Sum, aes(x=created_at.x, y=average_sentiment)) +
    #geom_area(alpha = 0.9) +
    geom_smooth(method="loess")+
    facet_wrap(~ newspaper)+
    labs(title= "Average Sentiment across Newspapers",x = "", 
         y = "Average Sentiment")+
    theme_bw()+
   theme(axis.text.x = element_text(angle = 45, hjust=1))+ 
  scale_x_date(labels = date_format("%b %y"))

Figure_Sentiment_Ideology_Sum


# Average sentiment by newspaper ideology and day
Sentiment_Ideology_Sum <- Tweets_Ideology %>%
  group_by(created_at.x, newspaper_ideology) %>%
  summarise(average_sentiment = weighted.mean(sentiment_value, public_metrics.retweet_count)) %>%
  ungroup()

# Date as Date format
Sentiment_Ideology_Sum$created_at.x<- as.Date(as.character(Sentiment_Ideology_Sum$created_at.x))

#Plot average sentiment per newspaper
Figure_Sentiment_Ideology_Sum <- ggplot(data = Sentiment_Ideology_Sum, aes(x=created_at.x, y=average_sentiment, col = newspaper_ideology)) +
    #geom_area(alpha = 0.9) 
    geom_smooth(method = "loess")+
#    facet_wrap(~ newspaper_ideology)+
    labs(title= "Average Sentiment around Royal Family across Newspapers",x = "", 
         y = "Average Sentiment")+
    theme_bw()+
   theme(axis.text.x = element_text(angle = 45, hjust=1))+ 
  scale_x_date(labels = date_format("%b %y"))

Figure_Sentiment_Ideology_Sum


```

# Topic salience by newspaper ideology

```{r, Salience of various topics, eval= FALSE}

# Give ideology to newspapers
Tweets_Sum<- Tweets %>%
  mutate(newspaper_ideology =  fct_recode(newspaper, 
                             "None" = "bbc_news", 
                             "Right Wing Newspaper"= "daily_mail",
                             "None" ="ft",
                             "Left Wing Newspaper" ="independent",
                             "Right Wing Newspaper" ="sun",
                              "Right Wing Newspaper" = "telegraph",
                             "Right Wing Newspaper" ="times",
                             "Left Wing Newspaper" = "guardian",
                             "Left Wing Newspaper" = "daily_mirror" )) 
# Filter out bbc and FT
Tweets_Sum<- Tweets_Sum %>%
    filter(newspaper_ideology != "None") #%>%
    #drop.levels()

Tweets_Sum<- Tweets_Sum %>%
  group_by(created_at.x, topic, newspaper_ideology) %>%
  summarise(n= n()) %>%
  group_by(created_at.x, newspaper_ideology) %>%
  mutate(perc = 100*n/sum(n)) 

library(lubridate)

# Date as Date format
Tweets_Sum$created_at.x<- as.Date(as.character(Tweets_Sum$created_at.x))

Tweets_Sum<- Tweets_Sum %>%
   dplyr::group_by(week =floor_date(created_at.x, "7 days"), topic, newspaper_ideology) %>%
    mutate(perc_week_topic = mean(perc))



ALL_Tweets_Topics<- ggplot(Tweets_Sum, aes(week, perc_week_topic, col = newspaper_ideology)) +
    #geom_area(alpha = 0.9) +
    geom_line()+
    facet_wrap(~ topic)+
    labs(title= "Average Issue Salience across Newspapers",x = "", 
         y = "Percentage of Tweets each week")+
    theme_bw()+
   scale_x_date(labels = date_format("%b %y"))+
   theme(axis.text.x = element_text(angle = 45, hjust=1))

ALL_Tweets_Topics


#setwd("C:/Users/omarh/Documents/GitHub/Satisfaction_Democracy/data-viz")

#ggsave("all_tweets_topics_plot.png",ALL_Tweets_Topics, height = 9, width = 16)

```

```{r, Salience of various topics, eval= FALSE}

# Give ideology to newspapers
Tweets<- Tweets %>%
  mutate(newspaper_ideology =  fct_recode(newspaper, 
                             "None" = "bbc_news", 
                             "Right Wing Newspaper"= "daily_mail",
                             "None" ="ft",
                             "Left Wing Newspaper" ="independent",
                             "Right Wing Newspaper" ="sun",
                              "Right Wing Newspaper" = "telegraph",
                             "Right Wing Newspaper" ="times",
                             "Left Wing Newspaper" = "guardian",
                             "Left Wing Newspaper" = "daily_mirror" )) 


# Filter out bbc and FT
Tweets_Sum<- Tweets %>%
    filter(newspaper_ideology != "None") #%>%
    #drop.levels()

Tweets_Sum <- Tweets_Sum %>%
  dplyr::group_by(created_at.x, topic, newspaper_ideology) %>%
  summarise(n = n()) %>%
   group_by(created_at.x, newspaper_ideology) %>%
  mutate(freq = 100*n / sum(n)) 

# Recode those with an average sentiment below -0.5 as negative and significantly negative for all newspapers
Tweets_Sum <- Tweets_Sum %>%
  mutate(topic_sentiment =  fct_recode(topic, 
                                       "Negative" = "Climate_Change",
                                       "Negative" = "Covid",
                                       "Negative" = "Crime",
                                       "Negative" = "Daily_News",
                                       "Positive" = "Enterteinment",	
                                       "Positive" =  "Puppy_News",
                                       "Positive" = "Royal_Family",
                                       "Positive" = "Sports",
                                       "Negative" = "Uk_Politics",
                                       "Negative" = "US_Politics",
                                       "Negative" = "Violence",
                                       "Negative" = "Weather" ))


# Filter out neutral
Tweets_Sum<- Tweets_Sum %>%
    filter(topic_sentiment == "Positive") %>% 
#Add % of negative topics
  group_by(created_at.x, newspaper_ideology) %>%
  summarise(percneg = sum(freq))  
  
  library(lubridate)



# Date as Date format
Tweets_Sum$created_at.x<- as.Date(as.character(Tweets_Sum$created_at.x))



#setwd("/Users/HammoudG/OneDrive - London School of Economics/Satisfaction_Democracy_Project/")

#write_csv(Tweets_Sum, "Database_Tweets_plot_Roberto.csv")


ALL_Tweets_Topics<- ggplot(Tweets_Sum, aes(created_at.x, percneg, col = newspaper_ideology)) +
    geom_smooth(method="loess", alpha=.1)+
    scale_color_manual(values=c('Darkblue','Darkred'))+
    labs(title= "",x = "", 
         y = "Percentage of Neutral Tweets each week")+
    theme_bw()+
   scale_x_date(labels = date_format("%b %y"))+
   theme(axis.text.x = element_text(angle = 45, hjust=1))

ALL_Tweets_Topics



#setwd("C:/Users/omarh/Documents/GitHub/Satisfaction_Democracy/data-viz")
setwd("~/Documents/GitHub/Satisfaction_Democracy/data-viz")
ggsave("leftright_sentimentsalience.png",ALL_Tweets_Topics, height = 9, width = 16)

```




# Google trends

```{r, google trends}

setwd("/Users/HammoudG/Documents/GitHub/Satisfaction_Democracy/data-raw/Google_Trends/")
Google_Trends<- read.csv("Google_Trends.csv")

Google_Trends$Week<- as.Date(as.factor(as.character(Google_Trends$Week)), format = "%d/%m/%Y")


# Pivot Table for data manipulation
Google_Trends_L <- pivot_longer(Google_Trends, cols = colnames(Google_Trends)[2:length(colnames(Google_Trends))],
                        names_to = "Topic", values_to = "N")

Google_Trends_L$Topic<-as.factor(Google_Trends_L$Topic)

#Plot topics on google trends
Google_Trends_Plot <- ggplot(data = Google_Trends_L, aes(x=Week, y=N)) +
    geom_line() +
    facet_wrap(~Topic)+
    labs(title= "Google Trends - Search for News in the United Kingdom",x = "", 
         y = "Interest Over Time", caption = "Numbers represent search interest relative to the highest point on the chart for\nthe given region and time. A value of 100 is the peak popularity for the term. A value of 50 means\nthat the term is half as popular. A score of 0 means that there was\nnot enough data for this term. Source: Google Trends")+
    theme_bw()+
   theme(axis.text.x = element_text(angle = 45, hjust=1))+ 
  scale_x_date(labels = date_format("%b %y"))

Google_Trends_Plot

setwd("/Users/HammoudG/Documents/GitHub/Satisfaction_Democracy/data-viz")
ggsave("Google_Trends_Plot.png",Google_Trends_Plot, height = 4, width = 7)
```